
#################################
#################################
#   Differential Expression (DE) analyses
#################################
#################################

# fit one model with all chosen covariates, then report results separately for each variable of interest:

# normalized: normalized data matrix
# predictors: names of predictors in annot data frame
# confounders: names of confounders in annot data frame
# annot: annotation data frame
# predictor.var.type: vector of "cont","cat" 
# confounder.var.type: vector of "cont","cat","surv"
run.DE.analyses = function(normalized,predictors,confounders,predictor.var.type,annot,
                           confounder.var.type,sampleannot.referencelevels,pval.adjustment = c("BY","BH","Bonf","none"),
                           prb.sets=NULL,saveresults=TRUE,path.to.DE.results,log,plottypearg,path.results,path.inc,path.to.csvs, probe.annotation.id.column,prb.annots)
{
  DE.warnings.paragraph = ""
  print("Starting differential expression (DE) analysis")
  cat("LOG:Starting differential expression (DE) analysis",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Starting differential expression (DE) analysis</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
  
  # remove samples that are missing values from any covariate:
  samples.with.NAs = rowSums(is.na(annot[,c(predictors,confounders),drop=FALSE]))>0
  
  saveresults1 = "genes"
  if(!saveresults){saveresults1="pathways"}
  covariates = as.data.frame(annot[,c(predictors,confounders)])
  names(covariates) = c(predictors,confounders)
 
  if(FALSE)
  {
    # format the categorical covariates' levels: e.g. change "Basal" to "Basal vs Luminal"  #<------- added 6-22
    for(i in 1:ncol(covariates))
    {
      tempcov = colnames(covariates)[i]
      if(predictor.var.type[tempcov]=="factor")
      {
        replacement = paste("",as.character(covariates[,i]),"vs.",sampleannot.referencelevels[tempcov])
        covariates[,i] = relevel(as.factor(replacement),paste("",sampleannot.referencelevels[tempcov],"vs.",sampleannot.referencelevels[tempcov]))
      }
    }
  }
  predictor = c(rep(TRUE,length(predictors)),rep(FALSE,length(confounders))) # vector indicating which variable is a predictor
  cat("predictor variable:",file=log,sep='\n',append=TRUE)
  suppressWarnings(write.table(predictor,file=log,sep='\t',append=TRUE))
  ### evaluate missingness
  missingobs = sum(rowSums(is.na(covariates))>0)
  missingpervariable = colSums(is.na(covariates))
  line1 = line2 =  ""
  if(missingobs>0)
  {
    line1 = paste("There are",missingobs,"samples missing data from at least one covariate.  They will be removed from the differential expression analysis.")
  }
  for(i in 1:length(missingpervariable))
  {
    if(missingpervariable[i]>0)
    {
      line2 = paste(line2,paste("\n",dimnames(covariates)[[2]][i],"is missing",missingpervariable[i],"observations.  They will be removed from the differential expression analysis."),sep="")
    }
  }
  write.table(paste(line1,"\n",line2),file=paste(path.to.DE.results,"//Missing data warning.txt",sep=""),
              row.names=FALSE,col.names=FALSE)
  DE.warnings.paragraph = paste(DE.warnings.paragraph,line1,"\n",line2)
  write.csv(covariates,file=paste(path.to.csvs,"//DE - covariates",saveresults1,".csv",sep=""))  
  
  # for each predictor: log fold change (named by contrast), pval, se
  temp1 = normalized[,1]
  temp = cbind(temp1,covariates)[!samples.with.NAs,]

  # report the terms in the final model, and define more informative result names:
  termsinmodel = c()
  newtermnames = newtermnames.linebreak = c()
  for(i in 2:ncol(temp))
  {
    if(is.numeric(temp[,i]))
    {
      termsinmodel=c(termsinmodel,colnames(temp)[i])
      newtermnames=c(newtermnames,colnames(temp)[i])
      newtermnames.linebreak=c(newtermnames.linebreak,colnames(temp)[i])
      
    }
    if(is.factor(temp[,i]))
    {
      termsinmodel=c(termsinmodel,paste(colnames(temp)[i],unique(temp[,i]),sep=""))
      newtermnames=c(newtermnames,paste(colnames(temp)[i],": differential expression in ",unique(temp[,i])," vs. baseline of ",sampleannot.referencelevels[colnames(temp)[i]],sep=""))
      newtermnames.linebreak=c(newtermnames.linebreak,paste(colnames(temp)[i],": ",unique(temp[,i])," vs.",sampleannot.referencelevels[colnames(temp)[i]],sep=""))
    }
  }
  names(newtermnames)=termsinmodel
  names(newtermnames.linebreak)=termsinmodel
  
  cat("Terms entered into linear model:",file=log,sep='\n',append=TRUE)
  cat(termsinmodel,file=log,sep='\n\n',append=TRUE)
  
  form = paste("temp1~")
  for(k in 1:dim(covariates)[2]){form = paste(form,"+",dimnames(covariates)[[2]][k])}
  # enter the formula in lm():
  lm1 = lm(form,data=temp)
  ests = matrix(NA,dim(normalized)[2],dim(summary(lm1)$coefficients)[1]-1)
  dimnames(ests)[[1]] = dimnames(normalized)[[2]]; dimnames(ests)[[2]] = dimnames(summary(lm1)$coefficients)[[1]][-1]
  # ests = matrix(NA,dim(normalized)[2],length(coef(lm1))-1)
  # dimnames(ests)[[1]] = dimnames(normalized)[[2]]; dimnames(ests)[[2]] = names(coef(lm1))[-1]

  failed.run=FALSE  
  # if ran of DOF of residuals, don't return results:
  if(df.residual(lm1)==0)
  {
    failed.run.message <- "\nWarning: There are too many parameters to estimate given the number of samples.   DE analyses will not be run.  Possible solutions: select fewer covariates for DE, or combine/reduce levels for categorical covariates, or increase the number of samples and rerun"
    cat(failed.run.message,file=log,sep='\n',append=TRUE)
    stop(failed.run.message)
  }
  
  
  # if a reference level is lost, don't return results:
  lostreflevel = FALSE
  for(i in 1:length(predictors))
  {
    if(predictor.var.type[i]=="categorical")
    {
      includedlevels = lm1$xlevels[[predictors[i]]]
      if(!is.element(sampleannot.referencelevels[predictors[i]],includedlevels)){lostreflevel=TRUE}
    }
  }
  
  if(lostreflevel)
  {
    cat("Warning: No values of a variable's reference level were eligible for the linear model.  DE analyses will not be run.  Examine the sample annotation file and re-run the analysis.",file=log,sep='\n',append=TRUE)
    failed.run=TRUE
    saveresults = FALSE
  } 
  pvals.adjusted = ses = pvals = ests
  active.variables=as.list(dimnames(ests)[[2]])
  #print(paste("active vars:",active.variables,sep=""))
  Gene.sets = c()
  for(j in 1:dim(ests)[1])
  {
    # define formula:
    temp1=normalized[,j]
    temp = cbind(temp1,covariates)
    form = paste("temp1~")
    for(k in 1:dim(covariates)[2]){form = paste(form,"+",dimnames(covariates)[[2]][k])}
    # enter the formula in lm():
    lm1 = lm(form,data=temp)
    mod = summary(lm1)$coefficients[-1,]
    if(is.vector(mod)){mod = t(as.matrix(mod))}
    
    ests[j,] = mod[,1]
    ses[j,] = mod[,2]
    pvals[j,] = mod[,4]
    # and get pathway memberships:
    if(length(prb.sets)>0)
    {
      Gene.sets[j] = paste(dimnames(prb.sets)[[2]][prb.sets[dimnames(ests)[[1]][j],]==1],collapse=", ")
    }
  }
  
  ## now identify the columns corresponding to predictors vs. confounders, removing columns that lm() has disregarded due to linear dependency:
  npredictors=0
  cat("predictor.var.type:",file=log,sep='\n',append=TRUE)
  suppressWarnings(write.table(predictor.var.type,file=log,sep='\t',append=TRUE))
  
  for(i in 1:sum(predictor))
  {
    
    suppressWarnings(write.table(dimnames(covariates)[[2]][i],file=log,sep='\t',append=TRUE))
    cat("dimnames(covariates)[[2]][i]:",file=log,sep='\n',append=TRUE)
    cat(dimnames(covariates)[[2]][i],file=log,sep='\n',append=TRUE)
    cat(nchar(dimnames(covariates)[[2]][i]),file=log,sep='\n',append=TRUE)
    cat(nchar(dimnames(covariates)[[2]][i]),file=log,sep='\n',append=TRUE)
    short=lapply(active.variables, function(x) substr(x,1,nchar(dimnames(covariates)[[2]][i])))
    cat("short:",file=log,sep='\n',append=TRUE)
    suppressWarnings(write.table(short,file=log,sep='\n',append=TRUE))
    cat("active.levels:",file=log,sep='\n',append=TRUE)
    cat(length(short[short == dimnames(covariates)[[2]][i]]),file=log,sep='\n',append=TRUE)
    
    
    if(predictor.var.type[i]=="categorical"){
      if(length(short[short == dimnames(covariates)[[2]][i]])<1){
        warning(paste("Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.", sep=""))
        cat(paste("LOG:Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.", sep=""),file=log,sep='\n',append=TRUE)
        cat(paste("document.write('<p>Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.</p>');",sep=""), file=paste(path.inc,"//status.js",sep=""),append=TRUE)
        DE.warnings.paragraph = paste(DE.warnings.paragraph,"Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.")
      }else if(length(short[short == dimnames(covariates)[[2]][i]])<(length(levels(covariates[,i]))-1)){
        warning(paste("Warning: ",dimnames(covariates)[[2]][i]," has levels that are redundant with another covariate in the data matrix.  They will be removed from all subsequent analyses.", sep=""))
        cat(paste("LOG:Warning: ",dimnames(covariates)[[2]][i]," has levels that are redundant with another covariate in the data matrix.  They will be removed from all subsequent analyses.", sep=""),file=log,sep='\n',append=TRUE)
        cat(paste("document.write('<p>Warning: ",dimnames(covariates)[[2]][i]," has levels that are redundant with another covariate in the data matrix.  They will be removed from all subsequent analyses.</p>');",sep=""), file=paste(path.inc,"//status.js",sep=""),append=TRUE)   
        DE.warnings.paragraph = paste(DE.warnings.paragraph,"Warning: ",dimnames(covariates)[[2]][i],"has levels that are redundant with another covariate in the data matrix.  They will be removed from all subsequent analyses.")
        npredictors=npredictors+length(short[short == dimnames(covariates)[[2]][i]])
      }else{
        npredictors=npredictors+length(short[short == dimnames(covariates)[[2]][i]])
        cat("npredictors:",file=log,sep='\n',append=TRUE)
        cat(npredictors,file=log,sep='\n',append=TRUE)
      }
    }
    if(predictor.var.type[i]=="continuous"){
      if(length(short[short == dimnames(covariates)[[2]][i]])<1){
        warning(paste("Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.", sep=""))
        cat(paste("LOG:Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.", sep=""),file=log,sep='\n',append=TRUE)
        cat(paste("document.write('<p>Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.</p>');",sep=""), file=paste(path.inc,"//status.js",sep=""),append=TRUE)
        DE.warnings.paragraph = paste(DE.warnings.paragraph,"Warning: ",dimnames(covariates)[[2]][i]," is redundant with another covariate in the data matrix.  It will be removed from all subsequent analyses.")
      }else{
        npredictors=npredictors+1
        cat("npredictors:",file=log,sep='\n',append=TRUE)
        cat(npredictors,file=log,sep='\n',append=TRUE)
      }
    }
  }
  
  # rename the results with mroe informative names:
  #newnames = newtermnames[match(colnames(ests),termsinmodel)]
  newnames = newtermnames[colnames(ests)]
  newnames.linebreak = newtermnames.linebreak[colnames(ests)]
  
##### Check success of the 
  
##### begin save results
  
  if(saveresults)  
  {    
    cat("npredictors:",file=log,sep='\n',append=TRUE)
    suppressWarnings(write.table(npredictors,file=log,sep='\t',append=TRUE))
    cat("head(pvals):",file=log,sep='\n',append=TRUE)
    suppressWarnings(write.table(head(pvals),file=log,sep='\t',append=TRUE))
    
    
    out.df.list     <- list()
    fdr.cutoff.list <- list()

    # volcano plot, coloring by significance level and naming top 100 genes
    for(i in 1:npredictors)
    {
      
      # write output tables for predictors
      out = cbind(ests[,i],ses[,i], ests[,i]-1.96*ses[,i],ests[,i]+1.96*ses[,i],pvals[,i])
      dimnames(out)[[2]] = c("Log2 fold change","std error", "Lower confidence limit","Upper confidence limit","P-value")
      extracolumn=FALSE

      if(pval.adjustment=="BY")
      {
        extracolumn=TRUE
        pvals.a = p.adjust(pvals[,i],method="BY")
        pvals.adjusted[,i]=pvals.a
        out = cbind(out,pvals.a); dimnames(out)[[2]][dim(out)[2]] = paste(toupper(pval.adjustment), ".p.value", sep = "") #"FDR"
      }
      if(pval.adjustment=="BH")
      {
        extracolumn=TRUE
        pvals.a = p.adjust(pvals[,i],method="BH")
        pvals.adjusted[,i]=pvals.a
        out = cbind(out,pvals.a); dimnames(out)[[2]][dim(out)[2]] = paste(toupper(pval.adjustment), ".p.value", sep = "") #"FDR"
      }
      if(pval.adjustment=="Bonf")
      {
        extracolumn=TRUE
        pvals.a = p.adjust(pvals[,i],method="bonferroni")
        pvals.adjusted[,i]=pvals.a
        out = cbind(out,pvals.a); dimnames(out)[[2]][dim(out)[2]] = paste(toupper(pval.adjustment), ".p.value", sep = "") #"Bonf.p.value"
      }
      if(pval.adjustment=="none")
      {
        extracolumn=TRUE
        pvals.a = p.adjust(pvals[,i],method="none")
        pvals.adjusted[,i]=pvals.a
        out = cbind(out,pvals.a); dimnames(out)[[2]][dim(out)[2]] = "Unadj.p.value"
      }

      #out = cbind(as.data.frame(signif(out,3)),Gene.sets)
      out = data.frame(signif(out,3),check.names = F); out$Gene.sets <- Gene.sets

      # order by p-val:
      out = out[order(pvals[,i]),]
      out$probe.ID <- rownames(out)
      rownames(out) <- paste(prb.annots[rownames(out),"Probe.Label"],prb.annots[rownames(out),"Analyte.Type"],sep = "-")
      write.csv(out,file=correct.filename(paste(path.to.DE.results,"//DE results - ",make.names(dimnames(ests)[[2]][i]),".csv",sep="")))  
      
      # and write a top hits version:
      out2 = out[1:20,]
      out2[,1:4]=signif(out2[,1:4],3)
      out2[,5]=signif(out2[,5],4)
      if(extracolumn){out2[,6]=signif(out2[,6],4)}
      
      write.csv(out2,file=correct.filename(paste(path.to.DE.results,"//DE results - Top 20 - ",make.names(dimnames(ests)[[2]][i]),".csv",sep="")))
      
      ##### volcano plots:

      out.df           <- as.data.frame(out)
      out.df           <- merge(out.df, prb.annots, by.x = "probe.ID", by.y = probe.annotation.id.column, all = F)
      rownames(out.df) <- paste(prb.annots[out.df$probe.ID,"Probe.Label"],prb.annots[out.df$probe.ID,"Analyte.Type"],sep = "-")
		
      out.df$volcanocol = rep("black",dim(out.df)[1])
      out.df$volcanocol[out.df[,"P-value"] < 0.010] = codecols2[1]
      out.df$volcanocol[out.df[,"P-value"] < 0.001] = codecols2[2]

      out.df$volcanopch = rep(1,dim(out.df)[1])
      out.df$volcanopch[out.df[,"P-value"] < 0.010] = 16
      out.df$volcanopch[out.df[,"P-value"] < 0.001] = 16
      
      # if adjusted p-values are calculated, draw FDR cutoff lines:

      if(is.element(pval.adjustment,c("BH","BY", "Bonf")))   # check on adding in bonferroni?  jbucci - 12/15/2015
      {

        ### 10/14/2015 - JBUCCI: updated code to incorporate protein data
        fdr.cutoffs <- c(0,0,0,0)

        below.cutoff <- which(out.df[,paste(toupper(pval.adjustment), ".p.value", sep = "")] < 0.01)
        if(length(below.cutoff)>0) fdr.cutoffs[1] = max(0, max(out.df[below.cutoff,"P-value"]))
			
        below.cutoff <- which(out.df[,paste(toupper(pval.adjustment), ".p.value", sep = "")] < 0.05)
        if(length(below.cutoff)>0) fdr.cutoffs[2] = max(0, max(out.df[below.cutoff,"P-value"]))
			
        below.cutoff <- which(out.df[,paste(toupper(pval.adjustment), ".p.value", sep = "")] < 0.10)
        if(length(below.cutoff)>0) fdr.cutoffs[3] = max(0, max(out.df[below.cutoff,"P-value"]))
      
        below.cutoff <- which(out.df[,paste(toupper(pval.adjustment), ".p.value", sep = "")] < 0.50)
        if(length(below.cutoff)>0) fdr.cutoffs[4] = max(0, max(out.df[below.cutoff,"P-value"]))

        print(fdr.cutoffs)
        fdr.cutoff.list[[i]] <- fdr.cutoffs

        # NEED to continue to work, allow for additional analytes beyond mRNA and protein
        #out.df$volcanopch = rep(7,dim(out.df)[1])

        # 12/9/2015 - jbucci - designate shapes for each analyte in volcano plots

        unfilled.Shapes <- c(1,2,0,5)
        filled.Shapes   <- c(16,17,15,18)

        for(l in unique(out.df$Analyte.Type))
        {
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] >= fdr.cutoffs[4],"volcanopch"] = unfilled.Shapes[which(unique(out.df$Analyte.Type) == l)]
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < fdr.cutoffs[4],"volcanopch"] = filled.Shapes[which(unique(out.df$Analyte.Type) == l)]
        }
			
      # 12/9/2015 - jbucci - designate colors for each analyte in volcano plots

        color.Analyte     <- c("chartreuse", "magenta")
        out.df$volcanocol <- rep("azure4",dim(out.df)[1])

        for(l in unique(out.df$Analyte.Type))
        {
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < fdr.cutoffs[4],"volcanocol"] = paste(color.Analyte[which(unique(out.df$Analyte.Type) == l)], "4", sep = "")
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < fdr.cutoffs[3],"volcanocol"] = paste(color.Analyte[which(unique(out.df$Analyte.Type) == l)], "3", sep = "")
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < fdr.cutoffs[2],"volcanocol"] = paste(color.Analyte[which(unique(out.df$Analyte.Type) == l)], "2", sep = "")
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < fdr.cutoffs[1],"volcanocol"] = paste(color.Analyte[which(unique(out.df$Analyte.Type) == l)], "1", sep = "")
        }
      }


      if(!is.element(pval.adjustment,c("BH","BY", "Bonf")))   # check on adding in bonferroni?  jbucci - 12/15/2015
      {
        # 12/9/2015 - jbucci - designate shapes for each analyte in volcano plots

        unfilled.Shapes <- c(1,2,0,5)
        filled.Shapes   <- c(16,17,15,18)

        for(l in unique(out.df$Analyte.Type))
        {
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] >= 0.010,"volcanopch"] = unfilled.Shapes[which(unique(out.df$Analyte.Type) == l)]
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < 0.010,"volcanopch"] = filled.Shapes[which(unique(out.df$Analyte.Type) == l)]
        }
			
      # 12/9/2015 - jbucci - designate colors for each analyte in volcano plots

        color.Analyte     <- c("chartreuse", "magenta")
        out.df$volcanocol <- rep("azure4",dim(out.df)[1])

        for(l in unique(out.df$Analyte.Type))
        {
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < 0.010,"volcanocol"] = paste(color.Analyte[which(unique(out.df$Analyte.Type) == l)], "4", sep = "")
          out.df[out.df[,"Analyte.Type"] == l & out.df[,"P-value"] < 0.001,"volcanocol"] = paste(color.Analyte[which(unique(out.df$Analyte.Type) == l)], "1", sep = "")
        }
      }

      #head(out.df)

      #write.csv(out.df,file=correct.filename(paste(path.to.DE.results,"//DE results - check - ",make.names(dimnames(ests)[[2]][i]),".csv",sep="")))  

      #table(out.df$volcanopch)
      #table(out.df$volcanocol)

      ### 10/14/2015 - JBUCCI: updated code to incorporate protein data

      for(r in 1:length(plottypearg))
      {
        plottype=plottypearg[r];
        tempfilename = drawplot(paste(path.to.DE.results,"//volcano plot",make.names(dimnames(ests)[[2]][i]),sep=""),plottype,width=1.3,height=1.3)
        tempfilename=gsub(path.results,"results",tempfilename)

        if(r==1)
        {
          strTemp=paste("document.write('      	  <li>Volcano Plot - ",dimnames(pvals)[[2]][i],"<br><img src=\"",tempfilename,".png\"></img></li>');\n",sep="")
          cat(strTemp,file=paste(path.inc,"//panel2_2.js",sep=""),append=TRUE)
        }

        #plot(ests[,i],-log10(pvals[,i]),xlab="log2(fold change)",ylab = "-log10(p-value)",main=newnames[i],col=volcanocol,pch=volcanopch)   
        #plot.file = file.path(paste(path.to.DE.results,"//volcano plot",make.names(dimnames(ests)[[2]][i]), ".pdf", sep=""))
        #pdf(file = plot.file, width = 8, height = 8, family = "sans")

        par(xpd=TRUE)
        par(mar=c(5,4,5,1))

        for(l in unique(out.df$Analyte.Type))
        {
          if(which(unique(out.df$Analyte.Type) == l) == 1)
          {
            ttl.i <- newnames[i]
            tmp <- unlist(strsplit(ttl.i,split = " "))
            if(length(tmp)>3)
              ttl.i <- paste(paste(tmp[1:3],collapse=" "),"\n",paste(tmp[-(1:3)],collapse = " "),sep="")
            plot(out.df[out.df[,"Analyte.Type"] == l,"Log2 fold change"], -log10(out.df[out.df[,"Analyte.Type"] == l,"P-value"]), 
                 xlab = "log2(fold change)", ylab = "-log10(p-value)",main=ttl.i, cex.main = 1,
                 xlim = c(min(out.df[,"Log2 fold change"]), max(out.df[,"Log2 fold change"])), 
                 ylim = c(min(-log10(out.df[,"P-value"])), max(-log10(out.df[,"P-value"]))),
                 col = out.df[out.df[,"Analyte.Type"] == l,"volcanocol"], 
                 pch = out.df[out.df[,"Analyte.Type"] == l,"volcanopch"])
          } else{
            points(out.df[out.df[,"Analyte.Type"] == l,"Log2 fold change"], -log10(out.df[out.df[,"Analyte.Type"] == l,"P-value"]), 
                   col = out.df[out.df[,"Analyte.Type"] == l,"volcanocol"],
                   pch = out.df[out.df[,"Analyte.Type"] == l,"volcanopch"])
          }
        }

        # draw lines for pval cutoffs if there's no FDR calculated:
        if(!is.element(pval.adjustment,c("BH","BY", "Bonf")))
        {
          #legend("bottomright",pch=c(16,16),col=c(codecols2[1],codecols2[2]),legend = c("p < 0.01","p < 0.001"))
          legend("topleft",bty="n",inset=c(0,-0.16), lty=1:2, legend = c("p-value < 0.01","p-value < 0.001"), cex = 1)
          legend("topright",bty="n",inset=c(0,-0.16), legend = unique(out.df$Analyte.Type), pch = filled.Shapes[1:length(unique(out.df$Analyte.Type))], 
            col = color.Analyte[1:length(unique(out.df$Analyte.Type))], cex = 1)
        }

        if(!is.element(pval.adjustment,c("BH","BY", "Bonf")))
        {
          par(xpd = F)
          abline(h=-log10(c(0.01,0.001)),lty= c(1:2))
        }

        # draw lines for FDR cutoffs if FDR has been calculated:
        if(is.element(pval.adjustment,c("BH","BY", "Bonf")))
        {
          legend("topleft",bty="n",inset=c(0,-0.16),lty=1:4,legend = paste("adj. p-value <",c("0.01","0.05","0.10","0.50")), cex = 1)
          legend("topright",bty="n",inset=c(0,-0.16), legend = unique(out.df$Analyte.Type), pch = filled.Shapes[1:length(unique(out.df$Analyte.Type))], 
            col = color.Analyte[1:length(unique(out.df$Analyte.Type))], cex = 1)
        }

        # draw lines for FDR cutoffs if FDR has been calculated:
        if(is.element(pval.adjustment,c("BH","BY", "Bonf")))
        {
          par(xpd = F)
          abline(h=-log10(fdr.cutoffs),lty=1:4)
        }

        top40 = out.df[order(out.df[,"P-value"],decreasing=FALSE),][1:40,]
        text(x=top40[,"Log2 fold change"],y=-log10(top40[,"P-value"]),labels=top40[,"Probe.Label"],cex=.8)

        dev.off()
      }


### 10/14/2015 - JBUCCI: updated code to incorporate protein data
### END
      
      #START HTML TABLE
      #numRow=nrow(out2)
      #numCol=ncol(out2)
      
      #genes=rownames(out2)	
      #cols=dimnames(out2)[[2]]
            
	out.df.list[[i]] <- out.df      
    }
    
  } # end if(saveresults)

#####
#####  

  print("Creating differential expression (DE) analysis results files")
  cat("LOG:Creating differential expression (DE) analysis results files",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Creating differential expression (DE) analysis results files</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
  
  # tweak covariate names to avoid illegal characters:
  
  write.csv(data.frame(ests,row.names = paste(prb.annots[rownames(ests),"Probe.Label"],prb.annots[rownames(ests),"Analyte.Type"],sep = "-")),
            file=paste(path.to.csvs,"//DE results - ests",saveresults1,".csv",sep=""))
  write.csv(data.frame(ses,row.names = paste(prb.annots[rownames(ses),"Probe.Label"],prb.annots[rownames(ses),"Analyte.Type"],sep = "-")),
            file=paste(path.to.csvs,"//DE results - ses",saveresults1,".csv",sep=""))
  write.csv(data.frame(pvals,row.names = paste(prb.annots[rownames(pvals),"Probe.Label"],prb.annots[rownames(pvals),"Analyte.Type"],sep = "-")),
            file=paste(path.to.csvs,"//DE results - pvals",saveresults1,".csv",sep=""))
  dimnames(ests)[[2]] = gsub("/",".",dimnames(ests)[[2]])
  dimnames(pvals)[[2]] = dimnames(ests)[[2]]
  dimnames(ses)[[2]] = dimnames(ests)[[2]]
  
  # finally, save lists of significant genes within each covariate:
  if(pval.adjustment!="none")
  {
    fdr.thresh = 0.10
    is.sig = pvals.adjusted<fdr.thresh
  }
  if(pval.adjustment=="none")
  {
    p.thresh = 0.01
    is.sig = pvals.adjusted<p.thresh
  }

  tempnames = colnames(is.sig)
  sig.gene.sets = list()
  # get a list of genes for each predictor:

  for(i in 1:length(predictors))
  {
    # relevant columns: everything starting with the predictor name:
    relevant.columns = substr(tempnames,1,nchar(predictors[i]))==predictors[i]
    if(sum(relevant.columns)>0)
    {
      tempgenes = rownames(is.sig)[rowSums(is.sig[,relevant.columns,drop=FALSE])>0]
      if(length(tempgenes)>2)
      {
        sig.gene.sets[[length(sig.gene.sets)+1]] = tempgenes
        if(pval.adjustment=="none")
        {
          names(sig.gene.sets)[length(sig.gene.sets)] = paste(predictors[i]," p < ",p.thresh,sep="")
        }
        if(pval.adjustment!="none")
        {
          names(sig.gene.sets)[length(sig.gene.sets)] = paste(predictors[i]," Adj.p.value < ",fdr.thresh,sep="")
        }
        
      }
    }
  }
  
  out = list(out.df.list = out.df.list, fdr.cutoff.list = fdr.cutoff.list, 
		ests=ests,ses=ses,pvals=pvals,npredictors=npredictors,Pathways=Gene.sets,pvals.adjusted=pvals.adjusted,
		pval.adjustment=pval.adjustment,DE.warnings.paragraph=DE.warnings.paragraph,sig.gene.sets=sig.gene.sets,
		failed.run=failed.run,newtermnames=newtermnames,newtermnames.linebreak=newtermnames.linebreak)
  
  
  print("Finished differential expression (DE) analysis")
  cat("LOG:Finished differential expression (DE) analysis",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Finished differential expression (DE) analysis</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
  return(out)
}
